For fragments passing filter and TSS fragments. Will be further qualified by hashtags below.
########## Filter the barcodes
cat("*** Reading barcode statistics files \n")
## *** Reading barcode statistics files
metadata_file <- paste0(outsPath, '/singlecell.csv')
if (!file.exists(metadata_file)) {stop(paste0("Metadata file does not exist: ",metadata_file))}
metadata = read.csv(metadata_file, header = 1)
metadata = metadata[2:nrow(metadata),]
metadata$logUMI = log10(metadata$passed_filters + 1)
metadata$promoter_ratio = (metadata$promoter_region_fragments+1) / (metadata$passed_filters + 1)
metadata$peak_region_ratio = (metadata$peak_region_fragments+1) / (metadata$passed_filters + 1)
metadata$enhancer_region_ratio = (metadata$enhancer_region_fragments+1) / (metadata$passed_filters + 1)
metadata$FRiP<-metadata$peak_region_ratio*100
metadata$is_cellranger_cell_barcode<-metadata$is_mm10_cell_barcode
#Subset by thresholds:
metadata<- metadata %>%
mutate(., is__cell_barcode = ifelse(
# passed_filters > cutoff_reads_min,
#(
passed_filters > 10^cutoff_reads_min &
#| passed_filters_GRCh38 > 10^cutoff_reads_min) &
enhancer_region_ratio > 0.2 &
passed_filters < 10^cutoff_reads_max,
"1", "0"))
table("MyFilter"=metadata$is__cell_barcode, "cellranger"=metadata$is_cellranger_cell_barcode)
## cellranger
## MyFilter 0 1
## 0 242371 2
## 1 348 90
#############
p1 <- ggplot(data = metadata,aes(x=logUMI,y=enhancer_region_ratio,col=factor(is_cellranger_cell_barcode))) +
geom_point(size=0.1) +
scale_color_manual(values=c("black","gold"),labels=c(paste("TRUE",sum(as.numeric(as.character(metadata$is_cellranger_cell_barcode)))),"FALSE")) +
theme(legend.title = element_blank())+
theme(legend.position="bottom",text=element_text(size=20)) +ggtitle("Cellranger cells")+ylim(c(0,0.5))
p2<-ggplot(data = metadata,aes(x=logUMI,y=enhancer_region_ratio,col=factor(is__cell_barcode))) +
geom_point(size=0.1) +
scale_color_manual(values=c("black","gold"),
labels=c(paste("TRUE",sum(as.numeric(as.character(metadata$is__cell_barcode)))),
"FALSE")) +
theme(legend.position="bottom",text=element_text(size=20)) +
geom_hline(yintercept = cutoff_enhancer_ratio, linetype="dashed",col="red")+
geom_vline(xintercept = (cutoff_reads_min), linetype="dashed",col="red" )+
geom_vline(xintercept = (cutoff_reads_max), linetype="dashed",col="red" )+
ggtitle("Cell calling by Enhancer Region Fragments")+
theme(legend.title = element_blank())+ylim(c(0,0.5))
p1+p2 & theme(legend.position = 'bottom')
## Warning: Removed 181260 rows containing missing values (geom_point).
## Warning: Removed 181260 rows containing missing values (geom_point).
#plot_layout(guides = "collect")
####FrIP:
p_fripCellranger<-ggplot(data = metadata,aes(x=log10(passed_filters),y=FRiP,col=factor(is_cellranger_cell_barcode))) +
geom_point(size=0.1) +
scale_color_manual(values=c("black","gold"),
labels=c(paste("TRUE",sum(as.numeric(as.character(metadata$is_cellranger_cell_barcode)))),
"FALSE")) +
ggtitle("Filtered by passed_filters fragments")+
theme(legend.title = element_blank())
p_frip<-ggplot(data = metadata,aes(x=log10(passed_filters),y=FRiP,col=factor(is__cell_barcode))) +
geom_point(size=0.1) +
scale_color_manual(values=c("black","gold"),
labels=c(paste("TRUE",sum(as.numeric(as.character(metadata$is__cell_barcode)))),
"FALSE")) +
ggtitle("Filtered by passed_filters fragments")+
theme(legend.title = element_blank())
p_fripCellranger+p_frip& theme(legend.position = 'bottom')
#plot_layout(guides = "collect")
ggplot(data = metadata,aes(x=log10(passed_filters_mm10),y=log10(passed_filters_GRCh38),col=factor(is__cell_barcode))) +
geom_point(size=0.1) +
scale_color_manual(values=c("black","gold"),
labels=c(paste("TRUE",sum(as.numeric(as.character(metadata$is__cell_barcode)))),
"FALSE")) +
ggtitle("Filtered by passed_filters fragments")+
theme(legend.title = element_blank())
ggplot(data = metadata,aes(x=log10(passed_filters_mm10),y=FRiP,col=factor(is__cell_barcode))) +
geom_point(size=0.1) +
scale_color_manual(values=c("black","gold"),
labels=c(paste("TRUE",sum(as.numeric(as.character(metadata$is__cell_barcode)))),
"FALSE")) +
ggtitle("Filtered by passed_filters fragments")+
theme(legend.title = element_blank())
saveRDS(metadata, paste0(projPath,"/", ExptName, "_metadataObject.RDS"))
dir.create(paste0(projPath,"/bam"))
## Warning in dir.create(paste0(projPath, "/bam")): '/nfsdata/projects/jph712/
## CUTnTag/H3K27ac_ExptFG/Expt_F/bam' already exists
cells<-
# seur_hto@meta.data %>% rownames_to_column("barcode") %>%
# mutate(., is__cell_barcode = ifelse(
# HTO_classification.global =="Singlet",
# "1", "0")) %>%
metadata %>%
filter(is__cell_barcode=="1")%>%
dplyr::select(barcode, is__cell_barcode)
cells$is__cell_barcode<-paste0(ExptName, "_FilteredCells")
write_tsv(x = cells, file=paste0(projPath,"/bam/FilteredCellBarcodes.tsv"), col_names = FALSE)
sessionInfo()
## R version 4.0.2 (2020-06-22)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Red Hat Enterprise Linux
##
## Matrix products: default
## BLAS/LAPACK: /usr/lib64/libopenblas-r0.3.3.so
##
## locale:
## [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
## [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
## [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
## [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
## [9] LC_ADDRESS=C LC_TELEPHONE=C
## [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
##
## attached base packages:
## [1] stats4 parallel stats graphics grDevices utils datasets
## [8] methods base
##
## other attached packages:
## [1] patchwork_1.1.0 GenomeInfoDb_1.26.2 IRanges_2.24.1
## [4] S4Vectors_0.28.1 BiocGenerics_0.36.0 Signac_1.1.0
## [7] Seurat_3.2.2 forcats_0.5.0 stringr_1.4.0
## [10] dplyr_1.0.2 purrr_0.3.4 readr_1.4.0
## [13] tidyr_1.1.2 tibble_3.0.4 ggplot2_3.3.2
## [16] tidyverse_1.3.0
##
## loaded via a namespace (and not attached):
## [1] reticulate_1.18 tidyselect_1.1.0
## [3] RSQLite_2.2.1 AnnotationDbi_1.52.0
## [5] htmlwidgets_1.5.2 grid_4.0.2
## [7] BiocParallel_1.24.1 Rtsne_0.15
## [9] munsell_0.5.0 codetools_0.2-18
## [11] ica_1.0-2 future_1.20.1
## [13] miniUI_0.1.1.1 withr_2.3.0
## [15] colorspace_2.0-0 OrganismDbi_1.32.0
## [17] Biobase_2.50.0 knitr_1.30
## [19] rstudioapi_0.13 ROCR_1.0-11
## [21] tensor_1.5 listenv_0.8.0
## [23] labeling_0.4.2 MatrixGenerics_1.2.0
## [25] GenomeInfoDbData_1.2.4 polyclip_1.10-0
## [27] farver_2.0.3 bit64_4.0.5
## [29] parallelly_1.21.0 vctrs_0.3.5
## [31] generics_0.1.0 xfun_0.19
## [33] BiocFileCache_1.14.0 biovizBase_1.38.0
## [35] lsa_0.73.2 ggseqlogo_0.1
## [37] R6_2.5.0 rsvd_1.0.3
## [39] AnnotationFilter_1.14.0 reshape_0.8.8
## [41] bitops_1.0-6 spatstat.utils_1.17-0
## [43] DelayedArray_0.16.0 assertthat_0.2.1
## [45] promises_1.1.1 scales_1.1.1
## [47] nnet_7.3-14 gtable_0.3.0
## [49] globals_0.14.0 goftest_1.2-2
## [51] ggbio_1.38.0 ensembldb_2.14.0
## [53] rlang_0.4.9 RcppRoll_0.3.0
## [55] splines_4.0.2 rtracklayer_1.50.0
## [57] lazyeval_0.2.2 dichromat_2.0-0
## [59] broom_0.7.2 checkmate_2.0.0
## [61] BiocManager_1.30.10 yaml_2.2.1
## [63] reshape2_1.4.4 abind_1.4-5
## [65] modelr_0.1.8 GenomicFeatures_1.42.1
## [67] backports_1.2.0 httpuv_1.5.4
## [69] Hmisc_4.4-2 RBGL_1.66.0
## [71] tools_4.0.2 ellipsis_0.3.1
## [73] RColorBrewer_1.1-2 ggridges_0.5.2
## [75] Rcpp_1.0.5 plyr_1.8.6
## [77] base64enc_0.1-3 progress_1.2.2
## [79] zlibbioc_1.36.0 RCurl_1.98-1.2
## [81] prettyunits_1.1.1 openssl_1.4.3
## [83] rpart_4.1-15 deldir_0.2-3
## [85] pbapply_1.4-3 cowplot_1.1.0
## [87] zoo_1.8-8 SummarizedExperiment_1.20.0
## [89] haven_2.3.1 ggrepel_0.8.2
## [91] cluster_2.1.0 fs_1.5.0
## [93] magrittr_2.0.1 data.table_1.13.4
## [95] lmtest_0.9-38 reprex_0.3.0
## [97] RANN_2.6.1 SnowballC_0.7.0
## [99] ProtGenerics_1.22.0 fitdistrplus_1.1-3
## [101] matrixStats_0.57.0 hms_0.5.3
## [103] mime_0.9 evaluate_0.14
## [105] xtable_1.8-4 XML_3.99-0.5
## [107] jpeg_0.1-8.1 readxl_1.3.1
## [109] gridExtra_2.3 compiler_4.0.2
## [111] biomaRt_2.46.0 KernSmooth_2.23-18
## [113] crayon_1.3.4 htmltools_0.5.0
## [115] mgcv_1.8-33 later_1.1.0.1
## [117] Formula_1.2-4 lubridate_1.7.9.2
## [119] DBI_1.1.0 tweenr_1.0.1
## [121] dbplyr_2.0.0 rappdirs_0.3.1
## [123] MASS_7.3-53 Matrix_1.2-18
## [125] cli_2.2.0 igraph_1.2.6
## [127] GenomicRanges_1.42.0 pkgconfig_2.0.3
## [129] GenomicAlignments_1.26.0 foreign_0.8-80
## [131] plotly_4.9.2.1 xml2_1.3.2
## [133] XVector_0.30.0 rvest_0.3.6
## [135] VariantAnnotation_1.36.0 digest_0.6.27
## [137] sctransform_0.3.1 RcppAnnoy_0.0.17
## [139] graph_1.68.0 spatstat.data_1.5-2
## [141] Biostrings_2.58.0 fastmatch_1.1-0
## [143] rmarkdown_2.5 cellranger_1.1.0
## [145] leiden_0.3.6 htmlTable_2.1.0
## [147] uwot_0.1.9 curl_4.3
## [149] shiny_1.5.0 Rsamtools_2.6.0
## [151] lifecycle_0.2.0 nlme_3.1-150
## [153] jsonlite_1.7.2 BSgenome_1.58.0
## [155] askpass_1.1 viridisLite_0.3.0
## [157] fansi_0.4.1 pillar_1.4.7
## [159] GGally_2.0.0 lattice_0.20-41
## [161] fastmap_1.0.1 httr_1.4.2
## [163] survival_3.2-7 glue_1.4.2
## [165] spatstat_1.64-1 png_0.1-7
## [167] bit_4.0.4 ggforce_0.3.2
## [169] stringi_1.5.3 blob_1.2.1
## [171] latticeExtra_0.6-29 memoise_1.1.0
## [173] irlba_2.3.3 future.apply_1.6.0